library(tidyverse)
Loading tidyverse: ggplot2
Loading tidyverse: tibble
Loading tidyverse: tidyr
Loading tidyverse: readr
Loading tidyverse: purrr
Loading tidyverse: dplyr
Conflicts with tidy packages -----------------------------------------------------------------
filter(): dplyr, stats
lag():    dplyr, stats
library(vcfR)

   *****       ***   vcfR   ***       *****
   This is vcfR 1.5.0 
     browseVignettes('vcfR') # Documentation
     citation('vcfR') # Citation
   *****       *****      *****       *****
format.names <- str_split(data$FORMAT[[1]],pattern=":")[[1]]
data2 <- data %>%
  select(-INFO,-ID,-FILTER, -FORMAT) %>%
  mutate(S95=str_replace_all(S95,"([^0-9]|$)\\.","\\1NA")) %>% # convert missing data "." into NA
  mutate(S96=str_replace_all(S96,"([^0-9]|$)\\.","\\1NA")) %>%
  separate(S95,sep=":",into = str_c("S95_",format.names), convert = TRUE) %>%
  separate(S96,sep=":",into=str_c("S96_",format.names), convert = TRUE) %>%
  filter(!(S95_GT=="1/1" & S96_GT=="1/1")) %>%
  filter(!(S95_GT=="0/0" & S96_GT=="0/0")) %>%
  select(-ends_with("DPR"))
data2

calculate allele frequencies and polarize based on S95 for each SNP

data2 <- data2 %>%
  mutate(S95_alt_AF=S95_AO/S95_DP,
         S96_alt_AF=S96_AO/S96_DP,
         S95_alt_major=S95_alt_AF > 0.5)
data2 <- data2 %>%
  mutate(S95_AF_polar=ifelse(S95_alt_major,S95_alt_AF,1-S95_alt_AF),
         S96_AF_polar=ifelse(S95_alt_major,S96_alt_AF,1-S96_alt_AF))
data2         

rolling mean

plot!

plA.data <- pl.data2 %>% filter(str_detect(CHROM,"chrA"),!str_detect(CHROM,"random")) 
plA <- ggplot(plA.data,aes(x=POS,y=AF_100,color=population,shape=population)) +
  facet_wrap(~CHROM, scales = "free_x",ncol=2) +
  #geom_point(alpha=.1) +
  geom_line()
plA

plot!

plC.data <- pl.data2 %>% filter(str_detect(CHROM,"chrC"),!str_detect(CHROM,"random")) 
plC <- ggplot(plC.data,aes(x=POS,y=AF_100,color=population,shape=population)) +
  facet_wrap(~CHROM, scales = "free_x",ncol=2) +
  #geom_point(alpha=.1) +
  geom_line()
plC

plot one per chromosome

chrom.names <- unique(pl.data2$CHROM) %>% str_subset("[0-9]$")
chrom.names
 [1] "chrA01" "chrA02" "chrA03" "chrA04" "chrA05" "chrA06" "chrA07" "chrA08" "chrA09" "chrA10" "chrC01" "chrC02" "chrC03" "chrC04"
[15] "chrC05" "chrC06" "chrC07" "chrC08" "chrC09"
for (chr in chrom.names) {
  pl.data2 %>% filter(str_detect(CHROM,chr)) %>%
    ggplot(aes(x=POS,y=AF_100,color=population)) +
    geom_line() +
    ggtitle(chr) +
    ylim(0,1)
  ggsave(str_c(chr,".png"),height = 8, width = 12)
}

absolute difference in allele frequency

data3 <- data2 %>%
  group_by(CHROM) %>%
  mutate(abs_delta_af = abs(S95_AO/S95_DP - S96_AO/S96_DP),
         abs_delta_af_100=roll_mean(abs_delta_af,n=100,fill=NaN),
         abs_delta_af_500=roll_mean(abs_delta_af,n=500,fill=NaN)
         )
data3

plot one per chromosome

for (chr in chrom.names) {
  data3 %>% filter(str_detect(CHROM,chr)) %>%
    ggplot(aes(x=POS,y=abs_delta_af_100)) +
    geom_line() +
    ggtitle(chr) +
    ylim(0,1)
  ggsave(str_c("Delta_af",chr,".png"),height = 8, width = 12)
}

Try filtering for F2 good snps

F2snps <- read_tsv("Final_F2_SNP_Sites.tab")
Parsed with column specification:
cols(
  CHROM = col_character(),
  POS = col_integer()
)
data2.F2.filtered <- semi_join(data2,F2snps)
Joining, by = c("CHROM", "POS")
dim(data2)
[1] 1926088      26
dim(F2snps)
[1] 18226     2
dim(data2.F2.filtered)
[1] 1997   26
data3.F2 <- data2.F2.filtered %>%
  group_by(CHROM) %>%
  mutate(abs_delta_af = abs(S95_AO/S95_DP - S96_AO/S96_DP),
         abs_delta_af_10=roll_mean(abs_delta_af,n=10,fill=NaN)
         )
data3.F2
for (chr in chrom.names) {
  data3.F2 %>% filter(str_detect(CHROM,chr)) %>%
    ggplot(aes(x=POS,y=abs_delta_af_10)) +
    geom_line() +
    ggtitle(chr) +
    ylim(0,1)
  ggsave(str_c("Delta_af_F2_filter",chr,".png"),height = 8, width = 12)
}

filter on 505

snps.505 <- read_csv("../kiat/Breeding_Prediction_App/Data/505/505_Genotype_170K_SNPs.csv.gz") %>%
  select(CHROM,POS)
Duplicated column names deduplicated: 'ID_505_K200' => 'ID_505_K200_1' [21]Parsed with column specification:
cols(
  .default = col_integer(),
  MARKER = col_character(),
  CHROM = col_character(),
  REF = col_character(),
  ALT = col_character()
)
See spec(...) for full column specifications.
head(snps.505)
data2.505.filtered <- semi_join(data2,snps.505)
Joining, by = c("CHROM", "POS")
dim(data2)
[1] 1926088      26
dim(snps.505)
[1] 174397      2
dim(data2.505.filtered)
[1] 14116    26
data3.505 <- data2.505.filtered %>%
  group_by(CHROM) %>%
  mutate(abs_delta_af = abs(S95_AO/S95_DP - S96_AO/S96_DP),
         abs_delta_af_10=roll_mean(abs_delta_af,n=10,fill=NaN)
         )
data3.505
for (chr in chrom.names) {
  data3.505 %>% filter(str_detect(CHROM,chr)) %>%
    ggplot(aes(x=POS,y=abs_delta_af_10)) +
    geom_point(aes(y=abs_delta_af),shape=1) +
    geom_line(color="blue") +
    ggtitle(chr) +
    ylim(0,1)
  ggsave(str_c("Delta_af_505_filter",chr,".png"),height = 8, width = 12)
}
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KHZjZlIpCmxpYnJhcnkoc3RyaW5ncikKbGlicmFyeShSY3BwUm9sbCkKYGBgCgpgYGB7cn0KY25hbWVzIDwtIHN0cl9zcGxpdCgiQ0hST00gIFBPUyAgICAgSUQgICAgICBSRUYgICAgIEFMVCAgICAgUVVBTCAgICBGSUxURVIgIElORk8gICAgRk9STUFUICBTOTUgICAgIFM5NiIscGF0dGVybj1ib3VuZGFyeSgid29yZCIpKVtbMV1dCgoKZGF0YSA8LSByZWFkX3RzdigiUzk1Uzk2X2ZpbHRlcmVkLnZjZi5yZWNvZGUudmNmIixjb21tZW50PSIjIixjb2xfbmFtZXMgPSBjbmFtZXMpCmRhdGEKYGBgCgpgYGB7cn0KZm9ybWF0Lm5hbWVzIDwtIHN0cl9zcGxpdChkYXRhJEZPUk1BVFtbMV1dLHBhdHRlcm49IjoiKVtbMV1dCgpkYXRhMiA8LSBkYXRhICU+JQogIHNlbGVjdCgtSU5GTywtSUQsLUZJTFRFUiwgLUZPUk1BVCkgJT4lCiAgbXV0YXRlKFM5NT1zdHJfcmVwbGFjZV9hbGwoUzk1LCIoW14wLTldfCQpXFwuIiwiXFwxTkEiKSkgJT4lICMgY29udmVydCBtaXNzaW5nIGRhdGEgIi4iIGludG8gTkEKICBtdXRhdGUoUzk2PXN0cl9yZXBsYWNlX2FsbChTOTYsIihbXjAtOV18JClcXC4iLCJcXDFOQSIpKSAlPiUKICBzZXBhcmF0ZShTOTUsc2VwPSI6IixpbnRvID0gc3RyX2MoIlM5NV8iLGZvcm1hdC5uYW1lcyksIGNvbnZlcnQgPSBUUlVFKSAlPiUKICBzZXBhcmF0ZShTOTYsc2VwPSI6IixpbnRvPXN0cl9jKCJTOTZfIixmb3JtYXQubmFtZXMpLCBjb252ZXJ0ID0gVFJVRSkgJT4lCiAgZmlsdGVyKCEoUzk1X0dUPT0iMS8xIiAmIFM5Nl9HVD09IjEvMSIpKSAlPiUKICBmaWx0ZXIoIShTOTVfR1Q9PSIwLzAiICYgUzk2X0dUPT0iMC8wIikpICU+JQogIHNlbGVjdCgtZW5kc193aXRoKCJEUFIiKSkKCmRhdGEyCmBgYAoKCgpjYWxjdWxhdGUgYWxsZWxlIGZyZXF1ZW5jaWVzIGFuZCBwb2xhcml6ZSBiYXNlZCBvbiBTOTUKZm9yIGVhY2ggU05QIApgYGB7cn0KZGF0YTIgPC0gZGF0YTIgJT4lCiAgbXV0YXRlKFM5NV9hbHRfQUY9Uzk1X0FPL1M5NV9EUCwKICAgICAgICAgUzk2X2FsdF9BRj1TOTZfQU8vUzk2X0RQLAogICAgICAgICBTOTVfYWx0X21ham9yPVM5NV9hbHRfQUYgPiAwLjUpCmRhdGEyIDwtIGRhdGEyICU+JQogIG11dGF0ZShTOTVfQUZfcG9sYXI9aWZlbHNlKFM5NV9hbHRfbWFqb3IsUzk1X2FsdF9BRiwxLVM5NV9hbHRfQUYpLAogICAgICAgICBTOTZfQUZfcG9sYXI9aWZlbHNlKFM5NV9hbHRfbWFqb3IsUzk2X2FsdF9BRiwxLVM5Nl9hbHRfQUYpKQpkYXRhMiAgICAgICAgIApgYGAKCnJvbGxpbmcgbWVhbgpgYGB7cn0KcGwuZGF0YTIgPC0gZGF0YTIgJT4lCiAgZ2F0aGVyKGtleT0icG9wdWxhdGlvbiIsdmFsdWU9ImFsbGVsZS5mcmVxdWVuY3kiLFM5NV9BRl9wb2xhcixTOTZfQUZfcG9sYXIpICU+JQogIGFycmFuZ2UoQ0hST00sUE9TKSAlPiUKICBncm91cF9ieShwb3B1bGF0aW9uLENIUk9NKSAlPiUKICBtdXRhdGUoQUZfMTAwPXJvbGxfbWVhbihhbGxlbGUuZnJlcXVlbmN5LG49MTAwLGZpbGw9TmFOKSkKcGwuZGF0YTIKYGBgCgoKcGxvdCEKYGBge3J9CnBsQS5kYXRhIDwtIHBsLmRhdGEyICU+JSBmaWx0ZXIoc3RyX2RldGVjdChDSFJPTSwiY2hyQSIpLCFzdHJfZGV0ZWN0KENIUk9NLCJyYW5kb20iKSkgCmBgYAoKYGBge3J9CnBsQSA8LSBnZ3Bsb3QocGxBLmRhdGEsYWVzKHg9UE9TLHk9QUZfMTAwLGNvbG9yPXBvcHVsYXRpb24sc2hhcGU9cG9wdWxhdGlvbikpICsKICBmYWNldF93cmFwKH5DSFJPTSwgc2NhbGVzID0gImZyZWVfeCIsbmNvbD0yKSArCiAgI2dlb21fcG9pbnQoYWxwaGE9LjEpICsKICBnZW9tX2xpbmUoKQpwbEEKYGBgCgoKcGxvdCEKYGBge3J9CnBsQy5kYXRhIDwtIHBsLmRhdGEyICU+JSBmaWx0ZXIoc3RyX2RldGVjdChDSFJPTSwiY2hyQyIpLCFzdHJfZGV0ZWN0KENIUk9NLCJyYW5kb20iKSkgCmBgYAoKYGBge3J9CnBsQyA8LSBnZ3Bsb3QocGxDLmRhdGEsYWVzKHg9UE9TLHk9QUZfMTAwLGNvbG9yPXBvcHVsYXRpb24sc2hhcGU9cG9wdWxhdGlvbikpICsKICBmYWNldF93cmFwKH5DSFJPTSwgc2NhbGVzID0gImZyZWVfeCIsbmNvbD0yKSArCiAgI2dlb21fcG9pbnQoYWxwaGE9LjEpICsKICBnZW9tX2xpbmUoKQpwbEMKYGBgCgpwbG90IG9uZSBwZXIgY2hyb21vc29tZQpgYGB7cn0KY2hyb20ubmFtZXMgPC0gdW5pcXVlKHBsLmRhdGEyJENIUk9NKSAlPiUgc3RyX3N1YnNldCgiWzAtOV0kIikKY2hyb20ubmFtZXMKYGBgCgpgYGB7cn0KZm9yIChjaHIgaW4gY2hyb20ubmFtZXMpIHsKICBwbC5kYXRhMiAlPiUgZmlsdGVyKHN0cl9kZXRlY3QoQ0hST00sY2hyKSkgJT4lCiAgICBnZ3Bsb3QoYWVzKHg9UE9TLHk9QUZfMTAwLGNvbG9yPXBvcHVsYXRpb24pKSArCiAgICBnZW9tX2xpbmUoKSArCiAgICBnZ3RpdGxlKGNocikgKwogICAgeWxpbSgwLDEpCiAgZ2dzYXZlKHN0cl9jKGNociwiLnBuZyIpLGhlaWdodCA9IDgsIHdpZHRoID0gMTIpCn0KYGBgCgoKIyMgYWJzb2x1dGUgZGlmZmVyZW5jZSBpbiBhbGxlbGUgZnJlcXVlbmN5CgpgYGB7cn0KZGF0YTMgPC0gZGF0YTIgJT4lCiAgZ3JvdXBfYnkoQ0hST00pICU+JQogIG11dGF0ZShhYnNfZGVsdGFfYWYgPSBhYnMoUzk1X0FPL1M5NV9EUCAtIFM5Nl9BTy9TOTZfRFApLAogICAgICAgICBhYnNfZGVsdGFfYWZfMTAwPXJvbGxfbWVhbihhYnNfZGVsdGFfYWYsbj0xMDAsZmlsbD1OYU4pLAogICAgICAgICBhYnNfZGVsdGFfYWZfNTAwPXJvbGxfbWVhbihhYnNfZGVsdGFfYWYsbj01MDAsZmlsbD1OYU4pCiAgICAgICAgICkKZGF0YTMKYGBgCgpwbG90IG9uZSBwZXIgY2hyb21vc29tZQoKYGBge3J9CmZvciAoY2hyIGluIGNocm9tLm5hbWVzKSB7CiAgZGF0YTMgJT4lIGZpbHRlcihzdHJfZGV0ZWN0KENIUk9NLGNocikpICU+JQogICAgZ2dwbG90KGFlcyh4PVBPUyx5PWFic19kZWx0YV9hZl8xMDApKSArCiAgICBnZW9tX2xpbmUoKSArCiAgICBnZ3RpdGxlKGNocikgKwogICAgeWxpbSgwLDEpCiAgZ2dzYXZlKHN0cl9jKCJEZWx0YV9hZiIsY2hyLCIucG5nIiksaGVpZ2h0ID0gOCwgd2lkdGggPSAxMikKfQpgYGAKCmBgYHtyfQpnZ3Bsb3QoZGF0YTMsIGFlcyh4PVM5NV9EUCkpICsgZ2VvbV9oaXN0b2dyYW0oKQpnZ3Bsb3QoZGF0YTMsIGFlcyh4PVM5Nl9EUCkpICsgZ2VvbV9oaXN0b2dyYW0oKQoKYGBgCgoKIyMgVHJ5IGZpbHRlcmluZyBmb3IgRjIgZ29vZCBzbnBzCgpgYGB7cn0KRjJzbnBzIDwtIHJlYWRfdHN2KCJGaW5hbF9GMl9TTlBfU2l0ZXMudGFiIikKYGBgCgpgYGB7cn0KZGF0YTIuRjIuZmlsdGVyZWQgPC0gc2VtaV9qb2luKGRhdGEyLEYyc25wcykKCmRpbShkYXRhMikKZGltKEYyc25wcykKZGltKGRhdGEyLkYyLmZpbHRlcmVkKQpgYGAKCmBgYHtyfQpkYXRhMy5GMiA8LSBkYXRhMi5GMi5maWx0ZXJlZCAlPiUKICBncm91cF9ieShDSFJPTSkgJT4lCiAgbXV0YXRlKGFic19kZWx0YV9hZiA9IGFicyhTOTVfQU8vUzk1X0RQIC0gUzk2X0FPL1M5Nl9EUCksCiAgICAgICAgIGFic19kZWx0YV9hZl8xMD1yb2xsX21lYW4oYWJzX2RlbHRhX2FmLG49MTAsZmlsbD1OYU4pCiAgICAgICAgICkKZGF0YTMuRjIKYGBgCgpgYGB7cn0KZm9yIChjaHIgaW4gY2hyb20ubmFtZXMpIHsKICBkYXRhMy5GMiAlPiUgZmlsdGVyKHN0cl9kZXRlY3QoQ0hST00sY2hyKSkgJT4lCiAgICBnZ3Bsb3QoYWVzKHg9UE9TLHk9YWJzX2RlbHRhX2FmXzEwKSkgKwogICAgZ2VvbV9saW5lKCkgKwogICAgZ2d0aXRsZShjaHIpICsKICAgIHlsaW0oMCwxKQogIGdnc2F2ZShzdHJfYygiRGVsdGFfYWZfRjJfZmlsdGVyIixjaHIsIi5wbmciKSxoZWlnaHQgPSA4LCB3aWR0aCA9IDEyKQp9CmBgYAoKIyMgZmlsdGVyIG9uIDUwNQoKYGBge3J9CnNucHMuNTA1IDwtIHJlYWRfY3N2KCIuLi9raWF0L0JyZWVkaW5nX1ByZWRpY3Rpb25fQXBwL0RhdGEvNTA1LzUwNV9HZW5vdHlwZV8xNzBLX1NOUHMuY3N2Lmd6IikgJT4lCiAgc2VsZWN0KENIUk9NLFBPUykKaGVhZChzbnBzLjUwNSkKYGBgCgpgYGB7cn0KZGF0YTIuNTA1LmZpbHRlcmVkIDwtIHNlbWlfam9pbihkYXRhMixzbnBzLjUwNSkKCmRpbShkYXRhMikKZGltKHNucHMuNTA1KQpkaW0oZGF0YTIuNTA1LmZpbHRlcmVkKQpgYGAKCmBgYHtyfQpkYXRhMy41MDUgPC0gZGF0YTIuNTA1LmZpbHRlcmVkICU+JQogIGdyb3VwX2J5KENIUk9NKSAlPiUKICBtdXRhdGUoYWJzX2RlbHRhX2FmID0gYWJzKFM5NV9BTy9TOTVfRFAgLSBTOTZfQU8vUzk2X0RQKSwKICAgICAgICAgYWJzX2RlbHRhX2FmXzEwPXJvbGxfbWVhbihhYnNfZGVsdGFfYWYsbj0xMCxmaWxsPU5hTikKICAgICAgICAgKQpkYXRhMy41MDUKYGBgCgpgYGB7cn0KZm9yIChjaHIgaW4gY2hyb20ubmFtZXMpIHsKICBkYXRhMy41MDUgJT4lIGZpbHRlcihzdHJfZGV0ZWN0KENIUk9NLGNocikpICU+JQogICAgZ2dwbG90KGFlcyh4PVBPUyx5PWFic19kZWx0YV9hZl8xMCkpICsKICAgIGdlb21fcG9pbnQoYWVzKHk9YWJzX2RlbHRhX2FmKSxzaGFwZT0xKSArCiAgICBnZW9tX2xpbmUoY29sb3I9ImJsdWUiKSArCiAgICBnZ3RpdGxlKGNocikgKwogICAgeWxpbSgwLDEpCiAgZ2dzYXZlKHN0cl9jKCJEZWx0YV9hZl81MDVfZmlsdGVyIixjaHIsIi5wbmciKSxoZWlnaHQgPSA4LCB3aWR0aCA9IDEyKQp9CmBgYA==